#####################################################################
# Script to take a series of orbital stock levels and object properties, and calibrate a physical model of debris evolution and collision risk. The scenarios are orchestrated from the shell script "figure-9.0--run-calibrated-simulation.sh".
#####################################################################

total_sim_time_start <- proc.time()[3]

library(tidyverse)
library(stargazer)
library(data.table)
library(glmnet)
library(gridExtra)
library(grid)
library(patchwork)
library(condMVNorm)
library(purrr)
library(viridis)
# library(survival)
# library(survminer)
library(Rearrangement)
# library(furrr)
# library(truncnorm)
library(doParallel)
library(doRNG)
library(tictoc)
library(nleqslv)
source("plotting_functions.r")
source("calibration_functions.r")
setwd("../data/calibration-data/")

tic()
seednum <- 501
set.seed(seednum)

scriptargs <- commandArgs(trailingOnly = TRUE)

#####
# Read in data
#####

residence_times <- read.csv("residence_time_shells.csv")
object_chars <- read.csv("pib_object_properties.csv")
lifetimes <- read.csv("decays_summary.csv")
# mass_flux <- read.csv("mass_distribution.csv")

stock_flow_series <- read.csv("esa_sector-year-shell_panel.csv")

#####
# Key economic parameters for simulation: growth rate and elasticity
#####

elasticity_distribution <- c(0, -0.1, -0.2)
draws_vector <- seq_along(elasticity_distribution) # vector of indices to draw from

growth_rate_returns <- as.numeric(scriptargs[1]) # command line input
growth_rate_costs <- 0.025 # historical aggregate cost growth over 2006-2020

### Why don't we just estimate the elasticity? Because the orbital occupancy elasticity of satellite payoffs is not identified from these data. In particular, we can't distinguish between potential economies of scale (returns to scale from constellations, greater availability of SSA and collision avoidance free-riding, positive parameter) and diseconomies of scale (market competition/spectrum congestion, negative parameter).
### Why do we use the total operator revenues? Because we're interested in the growth of the *space economy* (for comparability to the MS/GS/etc projections), not the growth of this particular sector.
### The path taken here is to simply set the mean elasticity to 0, assume a Normal distribution with standard deviation 0.1, and draw values. We assume the total sector revenues and costs grow at 3% each to isolate the effect of the occupancy elasticity.

#####
# Simulation hyperparameters
#####

# parallelization settings
ncores <- min(length(elasticity_distribution), 60) # number of cores to use for parallelization
message(ncores, " cores available for parallelization.")

n_years <- 1000 # number of years of simulation
n_sims <- as.numeric(scriptargs[2]) # number of simulations to do. each simulation should have a different elasticity.
set.seed(seednum)

#####
# Constants
#####

shell_lower <- 600 # lower bound of shell of interest
shell_upper <- 650 # upper bound of shell of interest

shell_of_interest <- c(shell_lower,shell_upper)
avoidance_fraction_ss <- 0.99 # assume 99% of all potential collisions between satellites are successfully avoided
avoidance_fraction_sd <- 0.95 # assume 95% of all potential collisions between satellites and debris are successfully avoided
avoidance_rates <- data.frame(avoidance_fraction_ss = avoidance_fraction_ss, avoidance_fraction_sd = avoidance_fraction_sd)

mean_sat_lifetime <- mean(lifetimes$mean_observed_lifetime)
mean_sat_residence <- 1 - 1/mean_sat_lifetime
mu <- 3.986004415e+5 # gravitational constant in units of km^3/s^2

#####
# Set targets, construct intrinsic probability coefficients
#####

# Filter for shell of interest
mean_altitude <- mean(shell_of_interest)
target_series <- stock_flow_series %>% 
	group_by(year) %>% mutate(agg_sats = sum(total_sats)) %>% ungroup(year) %>%
	filter(shell_index_continuous <= max(shell_of_interest) & shell_index_continuous >= min(shell_of_interest))

### Construct object-type time series
dfrm <- make_objecttype_series(target_series)

### Calculate shell objects as share of total
dfrm <- dfrm %>% mutate(share = sats/agg_sats)

### Calculate decay rate. Compute share of objects in orbit each year with properties of intact of fragment; then compute mean intact and fragment shares over horizon; then use the mean shares to weight the residence times and compute mean-share-weighted residence time.
phys_delta <- pmin(calc_phys_delta(target_series, residence_times),1)

### Calculate object masses using a similar share-weighted approach as above.
avg_masses <- calc_object_masses(target_series, object_chars)
average_sat_mass <- avg_masses$average_sat_mass
average_deb_mass <- avg_masses$average_deb_mass

### Coefficients: each of these is the "intrinsic" probability that an object gets hit by something, so the xsect area should reflect the object getting hit. Then when plugged into the negexp function the number of objects doing the hitting is used to give the other side of it. So then alpha_SS = alpha_SD. This formula is based on a kinetic gas-like model (pigeonhole principle) which assumes objects are moving at random.
alphas <- calc_alphas(object_chars, mean_altitude)

# ### Calculate the number of fragments created based on analytical formula from Letizia thesis -- calibrated in that study to fit NASA standard breakup model
beta_SS <- calc_nfrags(2*average_sat_mass)
beta_SD <- calc_nfrags((average_sat_mass + average_deb_mass))
beta_DD <- calc_nfrags(2*average_deb_mass)

betas <- data.frame(beta_SS = beta_SS, beta_SD = beta_SD, beta_DD = beta_DD)

phys_parm_list <- list(alphas = alphas, betas = betas, avoidance_rates = avoidance_rates, dfrm = dfrm)

phys_outputs <- generate_phys_coefs(phys_parm_list, "ridge")
phys_coefs <- phys_outputs$phys_coefs
L_t <- phys_outputs$L_t

# Reset the beta_DD coefficient if conducting sensitivity analysis over this parameter. Resetting at this stage doesn't cause the remaining beta_xx parameters and m to adjust. This is sufficient for doing a sensitivity analysis over beta_DD without getting into more complicated interactions between the "adjustments to random motion" in the ridge regression.
if(scriptargs[3] != "calibrated") {
	phys_coefs$beta_DD <- as.numeric(scriptargs[3])
}

if(scriptargs[4] != "calibrated") {
	phys_coefs$beta_SD <- as.numeric(scriptargs[4])
}

dfrm_augmented <- data.frame(dfrm, "Collision probability" = L_t)
stargazer(dfrm_augmented, summary=FALSE, digit.separator="", rownames=FALSE)

#####
# Construct economic measures
#####

econ_series_disaggregated <- target_series %>%
	filter(Sector=="commercial") %>% 
	select(year, Satellite.Communications, Earth.Observation, Ground.Stations.and.Equipment, Space.Situational.Awareness, Insurance.Premiums, Satellite.Launch.Industry..Commercial., Satellite.Manufacturing..Commercial.) %>% 
	mutate(
		LEO.Operator.revenues = Satellite.Communications + Earth.Observation, # this is the same as implied_pi_t later
		LEO.operator.costs = Ground.Stations.and.Equipment + Insurance.Premiums + Satellite.Launch.Industry..Commercial. + Satellite.Manufacturing..Commercial. + Space.Situational.Awareness # this is the same as implied_Ft later
		)

stargazer(econ_series_disaggregated, summary=FALSE, digit.separator="", rownames=FALSE)

# build the data series
big_econ_series <- left_join(econ_series_disaggregated, dfrm, by="year")
big_econ_series_oem <- big_econ_series %>% 
	mutate(
		time = year - econ_series_disaggregated$year[1],
		little_pi = LEO.Operator.revenues/sats # no reason to keep this around if it's not used?
)

# Calculate growth rates of total satellite industry revenues and total satellite industry costs

lm(log(LEO.Operator.revenues) ~ time, data=big_econ_series_oem) %>% summary()
lm(log(LEO.operator.costs) ~ time, data=big_econ_series_oem) %>% summary()

#####
# Simulate the path forward
#####

##### The point of this segment is to estimate the adjustment coefficients for the "moment condition". Try doing this without using any kind of "calculated_pi_t"

target_series <- target_series %>% filter(Sector=="commercial")

econ_series_full <- target_series %>% 
mutate(pi_t = Satellite.Communications + Earth.Observation,
	normalized_period = year - target_series$year[1],
	F_t = Ground.Stations.and.Equipment + Insurance.Premiums + Satellite.Launch.Industry..Commercial. + Satellite.Manufacturing..Commercial. + Space.Situational.Awareness,
	L_t = L_t
	) %>% select(year, normalized_period, pi_t, F_t, L_t) 

econ_series <- econ_series_full[-nrow(econ_series_full),]
econ_series <- econ_series %>% 
	mutate(
		r_st = pi_t/F_t, 
		Ft_Ft = c(NA,F_t[-nrow(econ_series)]/F_t[-1])
		) %>% 
		mutate(time = year - year[1])

econrisk_model <- lm(L_t ~ r_st + Ft_Ft, data=econ_series)
summary(econrisk_model)

econ_coefs <- coef(econrisk_model)
stargazer(econrisk_model,type="text")

# write out "adjustment coefficients"
write.csv(econ_coefs,file="calibrated_econrisk_coefs.csv")

## Prepare objects for the parallel part
state <- data.frame(S=target_series$total_sats[nrow(target_series)], D=target_series$total_debris[nrow(target_series)])
econ_vars <- data.frame(intercept = 1, r_st = econ_series$r_st[nrow(econ_series)], Ft_Ft = econ_series$Ft_Ft[nrow(econ_series)]) # The key economic variables in the final year -- this needs to happen after the elasticity is drawn and generate_econ_coefs is called

### Set launch constraint to maximum launches observed to that shell historically
max_observed_launch_rate <- max(dfrm$launches) #molr

message("Constant launch constraint to shell of interest is ", max_observed_launch_rate, " satellites per year.")

list_of_series <- list()

message("######################### \nBeginning forward simulation of orbit use... \n######################### \n")

## Begin the parallel part
cl <- makeCluster(ncores, type="FORK")
registerDoParallel(cl)
registerDoRNG(seednum)
setDefaultCluster(cl=cl)

sim_time_start <- proc.time()[3]

##### Simulate series across elasticity draws in parallel.

list_of_series <- foreach(sim=c(1:length(elasticity_distribution)), .inorder=TRUE) %dopar% {
	draw_idx <- draws_vector[sim] # set the draw index
	elasticity_draw <- elasticity_distribution[draw_idx] # draw the elasticity for this simulation

	# set up the dataframe with the columns to be filled
	simulated_series <- as.data.frame(matrix(nrow=n_years,ncol=16))
	colnames(simulated_series) <- c("sim","elasticity","year","launches","sats","debs","risk","excess_return","pi_t","F_t","r_st","Ft_Ft","FAR","Kessler","Kessler_ind","Kessler_time")

	# initialize params vector
	params <- data.frame(phys_coefs, excess_return = sum(econ_coefs*econ_vars), elasticity = elasticity_draw)

	# initialize dataframe entries. Start from the 2021 values as period 1, and then period 2 is 2022, and so on
	simulated_series$sim <- sim # sim index
	simulated_series$elasticity <- elasticity_draw # sim elasticity
	simulated_series$year <- 2020 + 1:n_years

	simulated_series$launches[1] <- NA # 2021 launch rate -- left NA since it will be updated in the loop
	simulated_series$sats[1] <- S_e_(dfrm$launches[nrow(dfrm)],dfrm$sats[nrow(dfrm)],dfrm$debs[nrow(dfrm)],params) # 2021 satellite stock
	simulated_series$debs[1] <- D_e_(dfrm$launches[nrow(dfrm)],dfrm$sats[nrow(dfrm)],dfrm$debs[nrow(dfrm)],params) # 2021 satellite stock
	simulated_series$risk[1] <- L_e(simulated_series$sats[1], simulated_series$debs[1], params) # 2021 collision risk

	# The pi_t and r_st terms use a decomposition of pi_t into a scalar that grows over time (the vanilla pi_t) and an elasticity component
	S_0 <- dfrm$sats[nrow(dfrm)]
	target_level <- econ_series$pi_t[nrow(econ_series)]
	pi_t_base <- exp(
		log(target_level) - log(1 + elasticity_draw) + elasticity_draw*log(S_0)
	)

	growth_factor_revenues_t <- exp(growth_rate_returns*2)
	# simulated_series$pi_t[1] <- econ_series$pi_t[nrow(econ_series)]*growth_factor_revenues_t*simulated_series$sats[1]^elasticity_draw # projected 2021 total sector revenues ## UPDATE THIS
	simulated_series$pi_t[1] <- pi_t_base*growth_factor_revenues_t*simulated_series$sats[1]^elasticity_draw # projected 2021 total sector revenues, adjusted for elasticity effect in final year


	F_t_base <- econ_series$F_t[nrow(econ_series)]
	growth_factor_costs_t <- exp(growth_rate_costs*2)
	simulated_series$F_t[1] <- F_t_base*growth_factor_costs_t # projected 2021 total sector costs
	simulated_series$r_st[1] <- simulated_series$pi_t[1]/simulated_series$F_t[1] # projected 2021 gross rate of return
	simulated_series$Ft_Ft[1] <- econ_series$F_t[nrow(econ_series)]*(1+growth_rate_costs)/simulated_series$F_t[1] # projected 2021 capital gains for orbit users from cost growth

	# initialize econ_vars_update for use in the loop
	econ_vars_update <- econ_vars
	econ_vars_update$r_st <- simulated_series$r_st[1]
	econ_vars_update$Ft_Ft <- simulated_series$Ft_Ft[1]

	simulated_series$excess_return[1] <- sum(econ_vars_update*econ_coefs) # 2021 excess return

	simulated_series$FAR[1] <- FAR(simulated_series$sats[1], simulated_series$debs[1], params) # 2021 FAR crossing check
	simulated_series$Kessler[1] <- Kessler_condition(simulated_series$debs[1],params) # 2021 Kessler condition check

	# Since the launch rate is based on the next-period stuff, we'll update the previous-period launch rate each iteration. That means the last period (n_years) won't have a launch rate.
	for(period in 2:n_years) {
		message("Beginning simulation ", sim, ", year ", period)

		# Update state, collision probability, and econ_vars_update
		## state
		state <- data.frame(S = simulated_series$sats[period-1], D = simulated_series$debs[period-1])
		## collision prob
		phys_coefs$L_t <- simulated_series$risk[period-1]
		## econ_vars_update. The pi_t here isn't the actual pi_t for this period; it's missing the multiplicative factor of S_t^elasticity. That will be calculated in the equilibrium solver, which needs the econ_coefs and econ_vars as inputs.
		### revenues -- need to separate the pi_t_base from the growth factor and effects of previous period elasticity to avoid serial correlation *due to satelite stocks*
		growth_factor_revenues_t <- growth_factor_revenues_t*exp(growth_rate_returns)
		pi_t_temp <- pi_t_base*growth_factor_revenues_t
		simulated_series$pi_t[period-1]*(1+growth_rate_returns)
		### costs -- keeping this in the simplest form
		simulated_series$F_t[period] <- simulated_series$F_t[period-1]*(1+growth_rate_costs)
		### rates
		econ_vars_update$r_st <- pi_t_temp/simulated_series$F_t[period]
		econ_vars_update$Ft_Ft <- simulated_series$F_t[period-1]/simulated_series$F_t[period]

		# update params vector
		params <- data.frame(phys_coefs, excess_return = sum(econ_coefs*econ_vars), elasticity = elasticity_draw)

		# Calculate period open-access launch rate
		## optim solver. Behaves strangely when corners are optimal.
		optim_result <- optim(
			1, 
			eqm_cond_empirical, 
			lower=0, upper=max_observed_launch_rate, 
			method = c("L-BFGS-B"), 
			econ_coefs = econ_coefs, econ_vars = econ_vars_update, phys_coefs = phys_coefs, state = state, elasticity_draw = elasticity_draw,
			control=list(fnscale=1e-10))
		launch_rate <- optim_result$par
		if(launch_rate<0) launch_rate <- 0

		# update launches in period-1
		simulated_series$launches[period-1] <- launch_rate
		# update states and collision prob for next period
		simulated_series$sats[period] <- S_e_(simulated_series$launches[period-1], simulated_series$sats[period-1], simulated_series$debs[period-1], params)
		simulated_series$debs[period] <- D_e_(simulated_series$launches[period-1], simulated_series$sats[period-1], simulated_series$debs[period-1], params)
		simulated_series$risk[period] <- L_e(simulated_series$sats[period], simulated_series$debs[period], params)

		# update pi_t and other econ variables with actual values
		simulated_series$pi_t[period] <- pi_t_temp*simulated_series$sats[period]^elasticity_draw
		simulated_series$r_st[period] <- simulated_series$pi_t[period]/simulated_series$F_t[period]
		simulated_series$Ft_Ft[period] <- econ_vars_update$Ft_Ft # this could have been done when calculated up above, but keeping it here with its mates
		## calculate excess return
		econ_vars_update$r_st <- simulated_series$r_st[period]
		simulated_series$excess_return[period] <- sum(econ_vars_update*econ_coefs)

		# Update FAR and other Kessler objects 
		simulated_series$FAR[period] <- FAR(simulated_series$sats[period], simulated_series$debs[period], params) # period FAR crossing check
		# simulated_series$Kessler[period] <- Kessler_condition(simulated_series$sats[period], simulated_series$debs[period], params) # period Kessler condition check
		simulated_series$Kessler[period] <- Kessler_condition(simulated_series$debs[period],params) # Check whether "Kessler_condition" returns TRUE
	}

	## Calculate Kessler time
	# simulated_series$Kessler_time <- simulated_series$year[min(which(simulated_series$Kessler_ind==TRUE))]
	# Compute first time that Kessler condition becomes 0; gets rid of initial condition effects -- OLD HACK
	Kessler_restore <- c(NA,diff(simulated_series$Kessler>0))
	Kessler_restore_time <- which(Kessler_restore==-1) + 2020
	if(length(Kessler_restore_time)==0) Kessler_restore_time <- Inf
	simulated_series$Kessler_ind <- as.numeric((simulated_series$Kessler>0)*(simulated_series$year>Kessler_restore_time))
	# Compute Kessler time from simulated_series$Kessler_ind
	simulated_series$Kessler_time <- simulated_series$year[min(which(simulated_series$Kessler_ind>=1))]

	simulated_series
}


stopCluster(cl)
sim_time_end <- proc.time()[3]

message("Time for simulation: ", round(sim_time_end - sim_time_start, 3), " seconds. Time per core: ", round((sim_time_end - sim_time_start)/ncores, 3), " seconds/core.")


toc()

sim_results_big <- rbindlist(list_of_series)
head(sim_results_big)

sim_results_big$Kessler_ind <- as.numeric(sim_results_big$Kessler>0)
# Compute Kessler time from simulated_series$Kessler_ind
# sim_results_big$Kessler_time <- sim_results_big$year[min(which(sim_results_big$Kessler_ind>=1))]

write.csv(sim_results_big, file=paste0("calibrated-sim-output-data/sim_results_big--",growth_rate_returns,"-",growth_rate_costs,"-",as.integer(phys_coefs$beta_DD),"-",as.integer(phys_coefs$beta_SD),".csv"))

sim_results_big_plotbase <- ggplot(data = sim_results_big, aes(x=year, group=elasticity, color=elasticity)) + 
	theme_bw() + 
	labs(x = "Year", y="", group="Elasticity") + 
	theme(text = element_text(size=16)) + 
	scale_color_viridis()

sim_results_plot_sats <- sim_results_big_plotbase + geom_line(aes(y=sats), size=1) + ylab("Satellites") + ggtitle("Satellites") + ylab("Satellites")
sim_results_plot_debs <- sim_results_big_plotbase + geom_line(aes(y=debs), size=1) + ylab("Debris") + ggtitle("Debris") + ylab("Debris") + ylim(c(0,1e5))
sim_results_plot_launches <- sim_results_big_plotbase + geom_line(aes(y=launches), size=1) + ylab("Launches") + ggtitle("Launches") + ylab("Satellites")
sim_results_plot_risk <- sim_results_big_plotbase + geom_line(aes(y=risk), size=1) + ylab("Risk") + ggtitle("Collision risk") + ylab("Probability")

results_summary <- sim_results_big %>%
	group_by(sim) %>%
	summarise(
		elasticity = elasticity[1],
		kessler = Kessler_ind[n()],
		ktime = min(which(Kessler_ind>=1))+2020,
	) %>%
	mutate(
		ktime_zero = which.min(elasticity^2)
	)

results_summary

elasticity_kessler <- ggplot(results_summary) +
	geom_point(aes(x=elasticity, y=ktime)) +
	geom_vline(aes(xintercept=0)) +
	theme_minimal() +
	ggtitle("elasticity vs kessler time")

timeseries_plot <- (sim_results_plot_launches / sim_results_plot_risk) | (sim_results_plot_sats / sim_results_plot_debs) | elasticity_kessler + plot_annotation(tag_levels="a") + plot_layout(guides='collect')

ggsave(
	filename=paste0("../images/elasticity-timeseries--",growth_rate_returns,"-",growth_rate_costs,".png"),
	plot=timeseries_plot,
	width=16,
	height=8,
	units="in",
	dpi=300
	)


st_negative <- ggplot(sim_results_big %>% filter(sim==20)) +
	geom_line(aes(x=year, y=sats)) +
	theme_bw() +
	ggtitle("elasticity < 0")

pit_negative <- ggplot(sim_results_big %>% filter(sim==20)) +
	geom_line(aes(x=year, y=pi_t)) +
	theme_bw()

st_positive <- ggplot(sim_results_big %>% filter(sim==40)) +
	geom_line(aes(x=year, y=sats)) +
	theme_bw() +
	ggtitle("elasticity > 0")

pit_positive <- ggplot(sim_results_big %>% filter(sim==40)) +
	geom_line(aes(x=year, y=pi_t)) +
	theme_bw()

pit_st_plot <- (st_negative / pit_negative) | (st_positive / pit_positive) | elasticity_kessler + plot_annotation(tag_levels="a")

ggsave(
	filename=paste0("../images/elasticity-summary--",growth_rate_returns,"-",growth_rate_costs,".png"),
	plot=pit_st_plot,
	width=16,
	height=8,
	units="in",
	dpi=300
	)


ktime_plot <- ggplot(data = results_summary) +
	geom_histogram(aes(x=ktime), binwidth=1) +
	geom_vline(aes(xintercept=ktime[ktime_zero])) +
	theme_bw() +
	labs(x="Kessler region entry time", y="Density") +
	theme(
		axis.text.x = element_text(size=15),
		axis.text.y = element_text(size=15)
	)

ggsave(
	filename=paste0("../images/kessler-time-hist--",growth_rate_returns,"-",growth_rate_costs,".png"),
	plot=ktime_plot,
	width=16,
	height=8,
	units="in",
	dpi=300
	)
